{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-04-05T08:14:50.439885Z",
     "start_time": "2020-04-05T08:14:49.715072Z"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from sklearn.metrics import cohen_kappa_score, accuracy_score, mean_absolute_error, f1_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-04-05T08:14:50.567387Z",
     "start_time": "2020-04-05T08:14:50.443140Z"
    }
   },
   "outputs": [],
   "source": [
    "df_oof_lgb = pd.read_csv('lgb_oof.csv')\n",
    "df_oof_xgb = pd.read_csv('xgb_oof.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-04-05T08:14:50.572374Z",
     "start_time": "2020-04-05T08:14:50.569026Z"
    }
   },
   "outputs": [],
   "source": [
    "df_oof_lgb.columns = ['SaleID', 'price', 'lgb_pred']\n",
    "df_oof_xgb.columns = ['SaleID', 'price', 'xgb_pred']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-04-05T08:14:50.593432Z",
     "start_time": "2020-04-05T08:14:50.573597Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SaleID</th>\n",
       "      <th>price</th>\n",
       "      <th>lgb_pred</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>2400.0</td>\n",
       "      <td>3612.5217</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>907.5410</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>12</td>\n",
       "      <td>1600.0</td>\n",
       "      <td>1358.7361</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>16</td>\n",
       "      <td>10500.0</td>\n",
       "      <td>23906.9300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>23</td>\n",
       "      <td>599.0</td>\n",
       "      <td>937.7173</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   SaleID    price    lgb_pred\n",
       "0       3   2400.0   3612.5217\n",
       "1       7   1000.0    907.5410\n",
       "2      12   1600.0   1358.7361\n",
       "3      16  10500.0  23906.9300\n",
       "4      23    599.0    937.7173"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_oof_lgb.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-04-05T08:14:50.664895Z",
     "start_time": "2020-04-05T08:14:50.594917Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SaleID</th>\n",
       "      <th>price</th>\n",
       "      <th>xgb_pred</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>2400.0</td>\n",
       "      <td>2984.63530</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>1001.40240</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>12</td>\n",
       "      <td>1600.0</td>\n",
       "      <td>1649.81560</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>16</td>\n",
       "      <td>10500.0</td>\n",
       "      <td>10485.24000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>23</td>\n",
       "      <td>599.0</td>\n",
       "      <td>541.96155</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   SaleID    price     xgb_pred\n",
       "0       3   2400.0   2984.63530\n",
       "1       7   1000.0   1001.40240\n",
       "2      12   1600.0   1649.81560\n",
       "3      16  10500.0  10485.24000\n",
       "4      23    599.0    541.96155"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_oof_xgb.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-04-05T08:14:50.722303Z",
     "start_time": "2020-04-05T08:14:50.668401Z"
    }
   },
   "outputs": [],
   "source": [
    "del df_oof_lgb['price']\n",
    "df_oof = df_oof_xgb.merge(df_oof_lgb, on='SaleID', how='left')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-04-05T08:14:50.734646Z",
     "start_time": "2020-04-05T08:14:50.723983Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SaleID</th>\n",
       "      <th>price</th>\n",
       "      <th>xgb_pred</th>\n",
       "      <th>lgb_pred</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>2400.0</td>\n",
       "      <td>2984.63530</td>\n",
       "      <td>3612.5217</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>1001.40240</td>\n",
       "      <td>907.5410</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>12</td>\n",
       "      <td>1600.0</td>\n",
       "      <td>1649.81560</td>\n",
       "      <td>1358.7361</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>16</td>\n",
       "      <td>10500.0</td>\n",
       "      <td>10485.24000</td>\n",
       "      <td>23906.9300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>23</td>\n",
       "      <td>599.0</td>\n",
       "      <td>541.96155</td>\n",
       "      <td>937.7173</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   SaleID    price     xgb_pred    lgb_pred\n",
       "0       3   2400.0   2984.63530   3612.5217\n",
       "1       7   1000.0   1001.40240    907.5410\n",
       "2      12   1600.0   1649.81560   1358.7361\n",
       "3      16  10500.0  10485.24000  23906.9300\n",
       "4      23    599.0    541.96155    937.7173"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_oof.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-04-05T08:14:50.742810Z",
     "start_time": "2020-04-05T08:14:50.736127Z"
    }
   },
   "outputs": [],
   "source": [
    "df_oof['pred'] = 0.45 * df_oof['xgb_pred'] + 0.55 * df_oof['lgb_pred']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-04-05T08:14:50.804840Z",
     "start_time": "2020-04-05T08:14:50.744104Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "mae: 2078.774093639886\n"
     ]
    }
   ],
   "source": [
    "mae = mean_absolute_error(df_oof['price'], df_oof['pred'])\n",
    "print('mae:', mae)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-04-05T08:14:50.842161Z",
     "start_time": "2020-04-05T08:14:50.806468Z"
    }
   },
   "outputs": [],
   "source": [
    "df_sub_lgb = pd.read_csv('lgb.csv')\n",
    "df_sub_xgb = pd.read_csv('xgb.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-04-05T08:14:50.901549Z",
     "start_time": "2020-04-05T08:14:50.843924Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SaleID</th>\n",
       "      <th>price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>150000</td>\n",
       "      <td>32203.112279</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>150001</td>\n",
       "      <td>327.954178</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>150002</td>\n",
       "      <td>5767.217929</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>150003</td>\n",
       "      <td>11524.863677</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>150004</td>\n",
       "      <td>640.850384</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   SaleID         price\n",
       "0  150000  32203.112279\n",
       "1  150001    327.954178\n",
       "2  150002   5767.217929\n",
       "3  150003  11524.863677\n",
       "4  150004    640.850384"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_sub_xgb.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-04-05T08:14:50.907106Z",
     "start_time": "2020-04-05T08:14:50.903285Z"
    }
   },
   "outputs": [],
   "source": [
    "df_sub_lgb.columns = ['SaleID', 'lgb_pred']\n",
    "df_sub_xgb.columns = ['SaleID', 'xgb_pred']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-04-05T08:14:50.972277Z",
     "start_time": "2020-04-05T08:14:50.908959Z"
    }
   },
   "outputs": [],
   "source": [
    "df_sub = df_sub_xgb.merge(df_sub_lgb, on='SaleID', how='left')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-04-05T08:14:50.986628Z",
     "start_time": "2020-04-05T08:14:50.973870Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SaleID</th>\n",
       "      <th>xgb_pred</th>\n",
       "      <th>lgb_pred</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>150000</td>\n",
       "      <td>32203.112279</td>\n",
       "      <td>31630.973295</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>150001</td>\n",
       "      <td>327.954178</td>\n",
       "      <td>332.198279</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>150002</td>\n",
       "      <td>5767.217929</td>\n",
       "      <td>5748.966332</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>150003</td>\n",
       "      <td>11524.863677</td>\n",
       "      <td>11208.232378</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>150004</td>\n",
       "      <td>640.850384</td>\n",
       "      <td>646.683380</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49995</th>\n",
       "      <td>199995</td>\n",
       "      <td>2917.859148</td>\n",
       "      <td>3014.241714</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49996</th>\n",
       "      <td>199996</td>\n",
       "      <td>1493.855060</td>\n",
       "      <td>1399.720483</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49997</th>\n",
       "      <td>199997</td>\n",
       "      <td>7177.348285</td>\n",
       "      <td>7543.108677</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49998</th>\n",
       "      <td>199998</td>\n",
       "      <td>10366.108635</td>\n",
       "      <td>10010.799392</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49999</th>\n",
       "      <td>199999</td>\n",
       "      <td>3479.532179</td>\n",
       "      <td>3467.922614</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>50000 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       SaleID      xgb_pred      lgb_pred\n",
       "0      150000  32203.112279  31630.973295\n",
       "1      150001    327.954178    332.198279\n",
       "2      150002   5767.217929   5748.966332\n",
       "3      150003  11524.863677  11208.232378\n",
       "4      150004    640.850384    646.683380\n",
       "...       ...           ...           ...\n",
       "49995  199995   2917.859148   3014.241714\n",
       "49996  199996   1493.855060   1399.720483\n",
       "49997  199997   7177.348285   7543.108677\n",
       "49998  199998  10366.108635  10010.799392\n",
       "49999  199999   3479.532179   3467.922614\n",
       "\n",
       "[50000 rows x 3 columns]"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_sub"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-04-05T08:14:51.043674Z",
     "start_time": "2020-04-05T08:14:50.987987Z"
    }
   },
   "outputs": [],
   "source": [
    "df_sub['price'] = 0.45 * df_sub['xgb_pred'] + 0.55 * df_sub['lgb_pred']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-04-05T08:14:51.485330Z",
     "start_time": "2020-04-05T08:14:51.045408Z"
    }
   },
   "outputs": [],
   "source": [
    "df_sub.to_csv('ronghe.csv', index=False, encoding='utf-8')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:dm] *",
   "language": "python",
   "name": "conda-env-dm-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
